# -*- coding: utf-8 -*
r"""
Live Documentation in the Notebook

Conversion of HTML (output by Sphinx or docutils) to Sage worksheet txt
file.

This takes an HTML document, i.e., Sage documentation, and returns it in
the editable format (notebook worksheet format with evaluable examples). It
also returns a string representing the CSS link for the document.  The SGML
parser is setup to return only the body of the HTML documentation page and
to re-format Sage examples and type-setting.

This module contains three classes:

- :class:`sagenb.notebook.docHTMLProcessor.genericHTMLProcessor`:
  gathers all the common methods of the other two classes.

- :class:`sagenb.notebook.docHTMLProcessor.SphinxHTMLProcessor`:
  translates HTML file generated by Sphinx into a worksheet text file

- :class:`sagenb.notebook.docHTMLProcessor.docutilsHTMLProcessor`:
  translates HTML file generated by docutils ``rst2html`` command into a
  worksheet text file

.. NOTE:: 

    This extension of sgmllib.SGMLParser was partly inspired by Mark
    Pilgrim's 'Dive Into Python' examples.

AUTHORS:

- Dorian Raymer (2006): first version

- William Stein (2007-06-10): rewrite to work with twisted Sage notebook

- Mike Hansen (2008-09-27): Rewrite to work with Sphinx HTML documentation

- Sebastien Labbe (2011-01-15): Added a new class named
  docutilsHTMLProcessor used for translating the html output of the
  rst2html docutils command run on a rst file into worksheet text file.
  Also added a new class named genericHTMLProcessor which gathers the
  common methods of both docutilsHTMLProcessor and SphinxHTMLProcessor
  classes. Added lots of doctests to make its coverage 100% doctested.

EXAMPLES:

Process the output of docutils ``rst2html`` command::

    sage: rst = ""
    sage: rst += "Additions in Sage\n"
    sage: rst += "-----------------\n"
    sage: rst += "\n"
    sage: rst += "Let's do easy computations with Sage::\n"
    sage: rst += "\n"
    sage: rst += "    s" + "age: 4 + 3\n"
    sage: rst += "    7\n"
    sage: rst += "    s" + "age: 1 - 2\n"
    sage: rst += "    -1\n"
    sage: rst += "\n"
    sage: rst += "Let's do `x^2`::\n"
    sage: rst += "\n"
    sage: rst += "    s" + "age: x^2\n"
    sage: rst += "    x^2\n"
    sage: from docutils.core import publish_string
    sage: html = publish_string(rst, writer_name='html')
    sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
    sage: p = docutilsHTMLProcessor()
    sage: txt = p.process_doc_html(html)
    sage: len(txt)
    191
    sage: print txt
    <h1 class="title">Additions in Sage</h1>
    <BLANKLINE>
    <BLANKLINE>
    <BLANKLINE>
    <p>Let's do easy computations with Sage:</p>
    <BLANKLINE>
    {{{id=0|
    4 + 3
    ///
    7
    }}}
    <BLANKLINE>
    {{{id=1|
    1 - 2
    ///
    -1
    }}}
    <BLANKLINE>
    <p>Let's do $x^2$:</p>
    <BLANKLINE>
    {{{id=2|
    x^2
    ///
    x^2
    }}}
    <BLANKLINE>
    <BLANKLINE>
"""
#############################################################################
#       Copyright (C) 2007 William Stein <wstein@gmail.com> and Dorian Raimer
#       Copyright (C) 2011 Sebastien Labbe <slabqc at gmail.com> 
#  Distributed under the terms of the GNU General Public License (GPL)
#  The full text of the GPL is available at:
#                  http://www.gnu.org/licenses/
#############################################################################

from sgmllib import SGMLParser
from urllib import splittag
from htmlentitydefs import entitydefs

class genericHTMLProcessor(SGMLParser):
    r"""
    This class gathers the methods that are common to both classes
    :class:`sagenb.notebook.SphinxHTMLProcessor` and
    :class:`sagenb.notebook.docutilsHTMLProcessor` .
    """
    def process_doc_html(self, doc_in):
        r"""
        Returns processed HTML input as HTML output.  This is the only
        method that needs to be called externally.

        INPUT:

        - ``doc_in`` - a string containing properly formed HTML

        OUTPUT:

        - a string; the processed HTML

        EXAMPLES::

            sage: rst = ""
            sage: rst += "Title\n"
            sage: rst += "-----\n"
            sage: rst += "n"
            sage: rst += "Some text\n"
            sage: from docutils.core import publish_string
            sage: html = publish_string(rst, writer_name='html')
            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: txt = p.process_doc_html(html)
            sage: len(txt)
            51
            sage: txt
            '<h1 class="title">Title</h1>\n\n<p>nSome text</p>\n\n\n\n'

        """        
        # self.feed() is a SGMLParser method and starts everything
        # off; Most of the functions here are extensions to
        # SGMLParser, and may never actually be visibly called here.
        self.feed(doc_in) #SGMLParser call
        self.close()     #SGMLParser call
        self.hand_off_temp_pieces('to_doc_pieces')
        return self.all_pieces


    def hand_off_temp_pieces(self, piece_type):
        r"""
        To separate the documentation's content from the Sage
        examples, everything is split into one of two cell types.
        This method puts the current ``self.temp_pieces`` into
        ``self.all_pieces``.

        INPUT:

        - ``piece_type`` - a string; indicates the type of and how to
          process the current ``self.temp_pieces``. It can be one of the
          following:

          - ``"to_doc_pieces"`` - put temp_pieces in all_pieces
          - ``"ignore"`` - delete temp_pieces
          - ``"to_cell_pieces"`` - translate temp_pieces into cells and put
            it in all_pieces

        EXAMPLES:

        Move temporary pieces to all pieces::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.hand_off_temp_pieces('to_doc_pieces')
            sage: p.all_pieces
            'a lot of stuff done bunch of tmp strings'
            sage: p.temp_pieces
            []

        Ignore temporary pieces::

            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.hand_off_temp_pieces('ignore')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            []

        Translate temporary pieces (starting with sage prompt) into cells::

            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['sage'+': 4+4\n', '8\n', 'sage'+': 9-4\n', '5\n']
            sage: p.hand_off_temp_pieces('to_cell_pieces')
            sage: print p.all_pieces
            a lot of stuff done
            {{{id=0|
            4+4
            ///
            8
            }}}
            <BLANKLINE>
            {{{id=1|
            9-4
            ///
            5
            }}}
            sage: p.temp_pieces
            []

        Translate temporary pieces (not starting with sage prompt) into cells::

            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.hand_off_temp_pieces('to_cell_pieces')
            sage: print p.all_pieces
            a lot of stuff done <pre class="literal-block">
            bunch of tmp strings
            </pre>
            sage: p.temp_pieces
            []

        """
        pieces = "".join(self.temp_pieces)
        pieces = pieces.lstrip()
        if piece_type == 'to_doc_pieces':
            self.all_pieces += pieces
            self.temp_pieces = []
        elif piece_type == 'ignore':
            self.temp_pieces = []
        elif piece_type == 'to_cell_pieces':
            pieces = self.process_cell_input_output(pieces)
            self.all_pieces += pieces
            self.temp_pieces = []
        else:
            raise ValueError('unknown piece_type(=%s)' % piece_type)

    def get_cellcount(self):
        r"""
        Return the current cell count and increment it by one.

        OUTPUT:

        - an int

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: d = docutilsHTMLProcessor()
            sage: d.get_cellcount()
            0
            sage: d.get_cellcount()
            1

        ::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: d = SphinxHTMLProcessor()
            sage: d.get_cellcount()
            0
            sage: d.get_cellcount()
            1
        """
        self.cellcount += 1
        return self.cellcount - 1

    def process_cell_input_output(self, cell_piece):
        r"""
        Process and return a ``cell_piece``.

        All divs with CSS class="highlight" (if generated with Sphinx)  or
        class="literal-block" (if generated with docutils) contain code
        examples.  They include

        - Models of how the function works.  These begin with, e.g.,
          'INPUT:' and are re-styled as divs with
          class="usage_model".

        - Actual Sage input and output.  These begin with 'sage:'.
          The input and output are separated according to the
          Notebook edit format.

        INPUT:

        - ``cell_piece`` - a string; a cell piece

        OUTPUT:

        - a string; the processed cell piece

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: s = "s" + "age: 4 + 4\n8"    # avoid the doctest script to parse "sage:"
            sage: p.process_cell_input_output(s)
            '\n{{{id=0|\n4 + 4\n///\n8\n}}}\n\n'
            sage: print p.process_cell_input_output(s)
            {{{id=1|
            4 + 4
            ///
            8
            }}}

        ::

            sage: s = "age: 4 + 4\n8"
            sage: print p.process_cell_input_output(s)
            <pre class="literal-block">
            age: 4 + 4
            8
            </pre>

        ::

            sage: s = '&gt;'*3 + " 4 + 4\n8"
            sage: print p.process_cell_input_output(s)
            {{{id=2|
            4 + 4
            ///
            8
            }}}

        ::

            sage: s = "s" + "age: 4 + 4\n8\ns" + "age: 2 + 2\n4"
            sage: print p.process_cell_input_output(s)
            {{{id=3|
            4 + 4
            ///
            8
            }}}
            <BLANKLINE>
            {{{id=4|
            2 + 2
            ///
            4
            }}}
        """
        if cell_piece[:5] != 'sage:' and cell_piece[:12] != '&gt;'*3:
            piece = self.false_positive_input_output_cell(cell_piece)
        else:
            # group and format inputs and outputs
            pieces = cell_piece.split('\n')
            output_flag = False
            piece = '\n{{{id=%s|\n'%self.get_cellcount()
            for p in pieces:

                if p[:5] == 'sage:' and not output_flag:
                    piece += p[5:].lstrip() + '\n'
                elif p[:5] == 'sage:' and output_flag:
                    piece += '\n}}}\n\n{{{id=%s|\n'%self.get_cellcount() + p[5:].lstrip() + '\n'
                    output_flag = False
                elif p[:12] == '&gt;'*3 and not output_flag:
                    piece += p[12:].lstrip() + '\n'
                elif p[:12] == '&gt;'*3 and output_flag:
                    piece += '\n}}}\n\n{{{id=%s|\n'%self.get_cellcount() + p[12:].lstrip() + '\n'
                    output_flag = False
                elif p[:3] == '...':
                    piece += p[3:] + '\n'
                else:
                    # in an output string. replace escaped html
                    # strings so they don't get converted twice.
                    p = p.replace('&lt;', '<')
                    p = p.replace('&gt;', '>')
                    p = p.replace('&amp;', '&')
                    p = p.replace('&#39;', "'")
                    # first occurrence of an output string
                    # write /// denoting output
                    if output_flag == False:
                        piece += '///'
                        if p:
                            piece += '\n' + p
                        output_flag = True
                    # multiple output lines exist, don't need /// repeated
                    else:
                        piece += p
            piece += '\n}}}\n\n'
        return piece
                
    ##############################################
    ## General tag handlers
    ## These just append their HTML to self.temp_pieces.
    def unknown_starttag(self, tag, attrs):
        r"""
        INPUT:

        - ``tag`` - string
        - ``attrs`` - list of tuples

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: tag = 'style'
            sage: attrs = [('type', 'text/css')]
            sage: p.unknown_starttag(tag, attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<style type="text/css">']
        """
        if self.keep_data:
            strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
            self.temp_pieces.append("<%(tag)s%(strattrs)s>" % locals())

    def unknown_endtag(self, tag):
        r"""
        INPUT:

        - ``tag`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.unknown_endtag('head')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '</head>'] 
        """
        if self.keep_data:
            self.temp_pieces.append("</%(tag)s>" % locals())

    def handle_data(self, data):
        r"""
        INPUT:

        - ``data`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_data('some important data')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', 'some important data']
        """
        if self.keep_data:
            self.temp_pieces.append(data)
    def handle_charref(self, ref):
        r"""
        INPUT:

        - ``ref`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_charref('160')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '&#160;']
        """
        if self.keep_data:
            self.temp_pieces.append("&#%(ref)s;" % locals())

    def handle_entityref(self, ref):
        r"""
        INPUT:

        - ``ref`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_entityref('160')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '&160']
        """
        if self.keep_data:
            self.temp_pieces.append("&%(ref)s" % locals())
            if entitydefs.has_key(ref):
                self.temp_pieces.append(';')
    def handle_comment(self, data):             
        r"""
        INPUT:

        - ``data`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_comment('important comment')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<!--important comment-->'] 
        """
        if self.keep_data:
            self.temp_pieces.append("<!--%(data)s-->" % locals())
    def handle_pi(self, text):
        r"""
        Handle processing instructions

        INPUT:

        - ``text`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_pi('instructions')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<?instructions>'] 
        """
        if self.keep_data:
            self.temp_pieces.append("<?%(text)s>" % locals())

    def handle_decl(self, text):
        r"""
        INPUT:

        - ``data`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.handle_decl('declaration')
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<!declaration>'] 
        """
        if self.keep_data:
            self.temp_pieces.append("<!%(text)s>" % locals())
        
    ##############################################
    ## Specific tag handlers
    def start_body(self, attrs):
        r"""
        Set ``self.keep_data`` to True upon finding the opening body tag.

        INPUT:

        - ``attrs`` - a string:string dictionary containing the
          element's attributes

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: d = SphinxHTMLProcessor()
            sage: d.keep_data
            False
            sage: d.start_body(None)
            sage: d.keep_data
            True

        ::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: d = docutilsHTMLProcessor()
            sage: d.keep_data
            False
            sage: d.start_body(None)
            sage: d.keep_data
            True
        """
        self.keep_data = True

    def end_body(self):
        r"""
        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.end_body()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
        """
        pass
    def end_html(self):
        r"""
        INPUT:

        - ``data`` - string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.end_html()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
        """
        pass

class SphinxHTMLProcessor(genericHTMLProcessor):
    def reset(self):
        r"""
        Initialize necessary variables.  Called by
        :meth:`SGMLParser.__init__`.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: d = SphinxHTMLProcessor()    #indirect doctest
            sage: d.keep_data
            False
            sage: d.in_highlight_div
            False
            sage: d.temp_pieces
            []
            sage: d.all_pieces
            ''
            sage: d.cellcount
            0
        """
        # flags
        self.keep_data = False #don't keep anything before the <body> tag
        self.in_highlight_div = False

        # lists of what the parser keeps
        self.temp_pieces = []
        self.all_pieces = ''

        # counters
        self.cellcount = 0
                
        SGMLParser.reset(self)

    def false_positive_input_output_cell(self, cell_piece):
        r"""
        Return the untouched html string of a false positive input output
        cell.

        A false positive input-output cell come from a block of code which
        doesn't start with the sage prompt string 'sage:' or the Python
        prompt '>>>'.

        INPUT:

        - ``cell_piece`` - string, a cell piece

        OUPUT:

            string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: s = "sage -rst2html -h"
            sage: print p.false_positive_input_output_cell(s)
            <div class="highlight"><pre>
            sage -rst2html -h
            </pre></div>

        """
        piece = '<div class="highlight"><pre>\n'
        piece += cell_piece
        piece = piece.replace('{','{&nbsp;')
        piece = piece.replace('}','}&nbsp;')
        piece += '\n</pre></div>'
        return piece

    #############################################
    ## Specific tag handlers
    ##
    def start_div(self, attrs):
        r"""
        Find out if we are starting a highlighted div.

        Once we hit the <div> tag in a highlighted block,
        hand of all of the pieces we've encountered so far
        and ignore the tag.
        
        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: attrs = [('class', 'highlight')]
            sage: p.start_div(attrs)
            sage: p.all_pieces
            'a lot of stuff done bunch of tmp strings' 
            sage: p.temp_pieces
            []
            sage: p.in_highlight_div
            True

        ::

            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = [('class', 'something-else')]
            sage: p.start_div(attrs)
            sage: p.all_pieces
            'a lot of stuff done ' 
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<div class="something-else">']
            sage: p.in_highlight_div
            False
        """
        for name, value in attrs:
            if name.lower()=='class' and value.lower()=='highlight':
                self.in_highlight_div = True
                self.hand_off_temp_pieces('to_doc_pieces')
                return
        self.unknown_starttag('div', attrs)

    def end_div(self):
        r"""
        Once we end the highlighted div, convert all of the pieces
        to cells.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings\n']
            sage: p.keep_data = True
            sage: attrs = [('class', 'highlight')]
            sage: p.start_div(attrs)
            sage: p.start_pre([])
            sage: sprompt = 'sa' + 'ge' + ': '    # to avoid problems with doctest script
            sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt))
            sage: p.end_pre()
            sage: p.end_div()
            sage: print p.all_pieces
            a lot of stuff done bunch of tmp strings
            {{{id=0|
            4+4
            ///
            8
            }}}
            <BLANKLINE>
            {{{id=1|
            x^2
            ///
            x^2
            }}}
            sage: p.temp_pieces
            [] 
            sage: p.in_highlight_div
            False

        ::

            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = [('class', 'something-else')]
            sage: p.start_div(attrs)
            sage: p.handle_data('some data')
            sage: p.end_div()
            sage: print p.all_pieces
            a lot of stuff done
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<div class="something-else">', 'some data', '</div>']
            sage: p.in_highlight_div
            False

        """
        if self.in_highlight_div:
            self.in_highlight_div = False
            self.hand_off_temp_pieces('to_cell_pieces')
            return
        self.temp_pieces.append("</div>")
    
    def start_pre(self, attrs):
        r"""
        Ignore tag <pre> when inside highligh div.

        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = True
            sage: attrs = []
            sage: p.start_pre(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 

        ::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = False
            sage: attrs = []
            sage: p.start_pre(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<pre>'] 
        """
        if self.in_highlight_div:
            return
        self.unknown_starttag('pre',attrs)

    def end_pre(self):
        r"""
        Ignore tag </pre> when inside highligh div.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = True
            sage: p.end_pre()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 

        ::

            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = False
            sage: p.end_pre()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '</pre>'] 
        """
        if self.in_highlight_div:
            return
        self.unknown_endtag('pre')

    #Ignore forms
    def start_form(self, attrs):
        r"""
        Hand of everything we've accumulated so far.

        Forms are ignored.

        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: attrs = []
            sage: p.start_form(attrs)
            sage: p.all_pieces
            'a lot of stuff done bunch of tmp strings' 
            sage: p.temp_pieces
            [] 
        """
        self.hand_off_temp_pieces('to_doc_pieces')
        return

    def end_form(self):
        r"""
        Ignore all of the pieces since we started
        the form.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.end_form()
            sage: p.all_pieces
            'a lot of stuff done ' 
            sage: p.temp_pieces
            [] 
        """
        self.hand_off_temp_pieces('ignore')
        return

    def start_span(self, attrs):
        r"""
        Ignore all spans that occur within highlighted blocks

        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = True
            sage: attrs = []
            sage: p.start_span(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 

        ::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = False
            sage: attrs = []
            sage: p.start_span(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<span>'] 
        """
        if self.in_highlight_div:
            return
        self.unknown_starttag('span', attrs)
    def end_span(self):
        r"""
        Ignore all spans that occur within highlighted blocks

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import SphinxHTMLProcessor
            sage: p = SphinxHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = True
            sage: p.end_span()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 

        ::

            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.in_highlight_div = False
            sage: p.end_span()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '</span>'] 
        """
        if self.in_highlight_div:
            return        
        self.unknown_endtag('span')

class docutilsHTMLProcessor(genericHTMLProcessor):
    r"""
    Translates output of the docutils parser rst2html into notebook text.

    EXAMPLES::

        sage: rst = ""
        sage: rst += "Additions in Sage\n"
        sage: rst += "-----------------\n"
        sage: rst += "\n"
        sage: rst += "Let's do easy computations with Sage::\n"
        sage: rst += "\n"
        sage: rst += "    s" + "age: 4 + 3\n"
        sage: rst += "    7\n"
        sage: rst += "    s" + "age: 1 - 2\n"
        sage: rst += "    -1\n"
        sage: rst += "\n"
        sage: rst += "Let's do `x^2`::\n"
        sage: rst += "\n"
        sage: rst += "    s" + "age: x^2\n"
        sage: rst += "    x^2\n"
        sage: from docutils.core import publish_string
        sage: html = publish_string(rst, writer_name='html')
        sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
        sage: p = docutilsHTMLProcessor()
        sage: txt = p.process_doc_html(html)
        sage: len(txt)
        191
        sage: print txt
        <h1 class="title">Additions in Sage</h1>
        <BLANKLINE>
        <BLANKLINE>
        <BLANKLINE>
        <p>Let's do easy computations with Sage:</p>
        <BLANKLINE>
        {{{id=0|
        4 + 3
        ///
        7
        }}}
        <BLANKLINE>
        {{{id=1|
        1 - 2
        ///
        -1
        }}}
        <BLANKLINE>
        <p>Let's do $x^2$:</p>
        <BLANKLINE>
        {{{id=2|
        x^2
        ///
        x^2
        }}}
        <BLANKLINE>
        <BLANKLINE>

    """
    def reset(self):
        r"""
        Initialize necessary variables.  Called by
        :meth:`SGMLParser.__init__`.

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: d = docutilsHTMLProcessor()    #indirect doctest
            sage: d.keep_data
            False
            sage: d.in_pre_litteral_block
            False
            sage: d.in_div_footer_block
            False
            sage: d.temp_pieces
            []
            sage: d.all_pieces
            ''
            sage: d.cellcount
            0
        """
        # flags
        self.keep_data = False #don't keep anything before the <body> tag
        self.in_pre_litteral_block = False
        self.in_div_footer_block = False

        # lists of what the parser keeps
        self.temp_pieces = []
        self.all_pieces = ''

        # counters
        self.cellcount = 0
                
        SGMLParser.reset(self)

    def false_positive_input_output_cell(self, cell_piece):
        r"""
        Return the untouched html string of a false positive input output
        cell.

        A false positive input-output cell come from a block of code which
        doesn't start with the sage prompt string 'sage:' or the Python
        prompt '>>>'.

        INPUT:

        - ``cell_piece`` - string, a cell piece

        OUPUT:

            string

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: s = "sage -rst2html -h"
            sage: print p.false_positive_input_output_cell(s)
            <pre class="literal-block">
            sage -rst2html -h
            </pre>

        """
        piece = '<pre class="literal-block">\n'
        piece += cell_piece
        piece = piece.replace('{','{&nbsp;')
        piece = piece.replace('}','}&nbsp;')
        piece += '\n</pre>'
        return piece
    
    #############################################
    ## Specific tag handlers
    ##
    # sage blocks
    def start_pre(self, attrs):
        r"""
        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: attrs = [('class', 'literal-block')]
            sage: p.start_pre(attrs)
            sage: p.all_pieces
            'a lot of stuff done bunch of tmp strings' 
            sage: p.temp_pieces
            []
            sage: p.in_pre_litteral_block
            True

        ::

            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = [('class', 'something-else')]
            sage: p.start_pre(attrs)
            sage: p.all_pieces
            'a lot of stuff done ' 
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<pre class="something-else">']
            sage: p.in_pre_litteral_block
            False
        """
        #Find out if we are starting a pre litteral-block
        for name, value in attrs:
            if name.lower()=='class' and value.lower()=='literal-block':
                self.in_pre_litteral_block = True
                self.hand_off_temp_pieces('to_doc_pieces')
                return
        self.unknown_starttag('pre',attrs)

    def end_pre(self):
        r"""
        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = [('class', 'literal-block')]
            sage: p.start_pre(attrs)
            sage: sprompt = 'sa' + 'ge' + ': '    # to avoid problems with doctest script
            sage: p.handle_data('%s4+4\n8\n%sx^2\nx^2\n' % (sprompt, sprompt))
            sage: p.end_pre()
            sage: print p.all_pieces
            a lot of stuff done bunch of tmp strings
            {{{id=0|
            4+4
            ///
            8
            }}}
            <BLANKLINE>
            {{{id=1|
            x^2
            ///
            x^2
            }}}
            sage: p.temp_pieces
            [] 
            sage: p.in_pre_litteral_block 
            False

        ::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = [('class', 'something-else')]
            sage: p.start_pre(attrs)
            sage: p.handle_data('some data')
            sage: p.end_pre()
            sage: print p.all_pieces
            a lot of stuff done
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '<pre class="something-else">', 'some data', '</pre>']
            sage: p.in_pre_litteral_block
            False
        """
        if self.in_pre_litteral_block:
            self.in_pre_litteral_block = False
            self.hand_off_temp_pieces('to_cell_pieces')
            return
        self.unknown_endtag('pre')

    # Ignore div
    def start_div(self, attrs):
        r"""
        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: attrs = [('class', 'document'), ('id', 'title')]
            sage: p.start_div(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
        """
        #Find out if we are starting a div footer block
        for name, value in attrs:
            if name.lower()=='class' and value.lower()=='footer':
                self.hand_off_temp_pieces('to_doc_pieces')
                self.keep_data = False
                self.in_div_footer_block = True
                return
        return

    def end_div(self):
        r"""
        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.end_div()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
        """
        if self.in_div_footer_block:
            self.in_div_footer_block = False
            self.keep_data = True
        return

    # latex role
    def start_cite(self, attrs):
        r"""
        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: attrs = []
            sage: p.start_cite(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '$'] 
        """
        self.temp_pieces.append("$")
        return

    def end_cite(self):
        r"""
        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.keep_data = True
            sage: p.start_cite([])
            sage: p.handle_data('x^2')
            sage: p.end_cite()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings', '$', 'x^2', '$'] 
        """
        self.temp_pieces.append("$")
        return

    # script (for example for mathjax)
    def start_script(self, attrs):
        r"""
        INPUT:

        - ``attrs`` - list of tuple

        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: attrs = [('type', 'text/x-mathjax-config')]
            sage: p.start_script(attrs)
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
            sage: p.keep_data
            False
        """
        self.keep_data = False
        return

    def end_script(self):
        r"""
        EXAMPLES::

            sage: from sagenb.notebook.docHTMLProcessor import docutilsHTMLProcessor
            sage: p = docutilsHTMLProcessor()
            sage: p.all_pieces = 'a lot of stuff done '
            sage: p.temp_pieces = ['bunch ', 'of ', 'tmp ', 'strings']
            sage: p.end_script()
            sage: p.all_pieces
            'a lot of stuff done '
            sage: p.temp_pieces
            ['bunch ', 'of ', 'tmp ', 'strings'] 
            sage: p.keep_data 
            True
        """
        self.keep_data = True
        return

